import requests
from bs4 import BeautifulSoup


# 爬取数据
def get_html(url):
    r = requests.get(url)
    r.encoding = "utf-8"
    return r.text


# 解析数据
def parse_html(html_doc):
    data_list = []
    soup = BeautifulSoup(html_doc, 'html.parser')
    a_list = soup.find_all("a")
    for a in a_list:
        # print(a)
        # print(a.text, a.get("href"))
        data_list.append({"href":a.get("href"),"text":a.text})
    return data_list


# 数据存储
def data_store(data_list):
    # 连接数据库，批量插入数据
    for a in data_list:
        print(a)


if __name__ == "__main__":

    url = "https://www.baidu.com"

    html_doc = get_html(url)
    data = parse_html(html_doc)
    data_store(data)